In [ ]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

Support Vector Machines


In [ ]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split

digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data / 16., digits.target % 2, random_state=2)

In [ ]:
from sklearn.svm import LinearSVC, SVC
linear_svc = LinearSVC(loss="hinge").fit(X_train, y_train)
svc = SVC(kernel="linear").fit(X_train, y_train)

In [ ]:
np.mean(linear_svc.predict(X_test) == svc.predict(X_test))

Kernel SVMs

Predictions in a kernel-SVM are made using the formular

$$ \hat{y} = \alpha_0 + \alpha_1 y_1 k(\mathbf{x^{(1)}}, \mathbf{x}) + ... + \alpha_n y_n k(\mathbf{x^{(n)}}, \mathbf{x})> 0 $$$$ 0 \leq \alpha_i \leq C $$

Radial basis function (Gaussian) kernel: $$k(\mathbf{x}, \mathbf{x'}) = \exp(-\gamma ||\mathbf{x} - \mathbf{x'}||^2)$$


In [ ]:
from sklearn.metrics.pairwise import rbf_kernel
line = np.linspace(-3, 3, 100)[:, np.newaxis]
kernel_value = rbf_kernel([[0]], line, gamma=1)
plt.plot(line, kernel_value.T)

In [ ]:
from plots import plot_svm_interactive
plot_svm_interactive()

In [ ]:
svc = SVC().fit(X_train, y_train)
svc.score(X_test, y_test)

In [ ]:
Cs = [0.001, 0.01, 0.1, 1, 10, 100]
gammas = [0.001, 0.01, 0.1, 1, 10, 100]

from sklearn.grid_search import GridSearchCV

param_grid = {'C': Cs, 'gamma' : gammas}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

In [ ]:
grid_search.score(X_test, y_test)

In [ ]:
# We extract just the scores
scores = [x[1] for x in grid_search.grid_scores_]
scores = np.array(scores).reshape(6, 6)

plt.matshow(scores)
plt.xlabel('gamma')
plt.ylabel('C')
plt.colorbar()
plt.xticks(np.arange(6), param_grid['gamma'])
plt.yticks(np.arange(6), param_grid['C']);

Excercise

  • Scale the data using StandardScaler before applying the SVC. How does the performance of the default parameters change?
  • Grid-Search the parameters for the scaled data. How do they differ from the previous ones?